Use raw fastq and generate the quality plots to asses the quality of reads
Filter and trim out bad sequences and bases from our sequencing files
Write out fastq files with high quality sequences
Evaluate the quality from our filter and trim.
Infer errors on forward and reverse reads individually
Identified ASVs on forward and reverse reads separately using the error model.
Merge forward and reverse ASVs into “contigous ASVs”.
Generate ASV count table. (otu_table input for
phyloseq.).
ASV count table: otu_table
Taxonomy table tax_table
Sample information: sample_table track the reads
lost throughout DADA2 workflow.
#Set the raw fastq path to the raw sequencing files
#Path to the fastq files
raw_fastqs_path <- "data/01_DADA2/01_raw_gzipped_fastqs"
#What files are in this path (Intuition check)
list.files(raw_fastqs_path)## [1] "568_4_S245_L001_R1_001.fastq.gz" "568_4_S245_L001_R2_001.fastq.gz"
## [3] "568_5_S246_L001_R1_001.fastq.gz" "568_5_S246_L001_R2_001.fastq.gz"
## [5] "581_5_S247_L001_R1_001.fastq.gz" "581_5_S247_L001_R2_001.fastq.gz"
## [7] "611_5_S248_L001_R1_001.fastq.gz" "611_5_S248_L001_R2_001.fastq.gz"
## [9] "E03_5_S146_L001_R1_001.fastq.gz" "E03_5_S146_L001_R2_001.fastq.gz"
## [11] "E05_5_S147_L001_R1_001.fastq.gz" "E05_5_S147_L001_R2_001.fastq.gz"
## [13] "E1_4_S140_L001_R1_001.fastq.gz" "E1_4_S140_L001_R2_001.fastq.gz"
## [15] "E1_5_S141_L001_R1_001.fastq.gz" "E1_5_S141_L001_R2_001.fastq.gz"
## [17] "E2_4_S142_L001_R1_001.fastq.gz" "E2_4_S142_L001_R2_001.fastq.gz"
## [19] "E2_5_S143_L001_R1_001.fastq.gz" "E2_5_S143_L001_R2_001.fastq.gz"
## [21] "E3_4_S144_L001_R1_001.fastq.gz" "E3_4_S144_L001_R2_001.fastq.gz"
## [23] "E3_5_S145_L001_R1_001.fastq.gz" "E3_5_S145_L001_R2_001.fastq.gz"
## [25] "Neg1_S148_L001_R1_001.fastq.gz" "Neg1_S148_L001_R2_001.fastq.gz"
## [27] "Neg2_S249_L001_R1_001.fastq.gz" "Neg2_S249_L001_R2_001.fastq.gz"
## chr [1:28] "568_4_S245_L001_R1_001.fastq.gz" ...
#Create a vector of forward reads
forward_reads <- list.files(raw_fastqs_path, pattern = "_R1_001.fastq.gz", full.names = TRUE)
#Intuition check
head(forward_reads)## [1] "data/01_DADA2/01_raw_gzipped_fastqs/568_4_S245_L001_R1_001.fastq.gz"
## [2] "data/01_DADA2/01_raw_gzipped_fastqs/568_5_S246_L001_R1_001.fastq.gz"
## [3] "data/01_DADA2/01_raw_gzipped_fastqs/581_5_S247_L001_R1_001.fastq.gz"
## [4] "data/01_DADA2/01_raw_gzipped_fastqs/611_5_S248_L001_R1_001.fastq.gz"
## [5] "data/01_DADA2/01_raw_gzipped_fastqs/E03_5_S146_L001_R1_001.fastq.gz"
## [6] "data/01_DADA2/01_raw_gzipped_fastqs/E05_5_S147_L001_R1_001.fastq.gz"
#Create a vector of reverse reads
reverse_reads <-list.files(raw_fastqs_path, pattern = "_R2_001.fastq.gz",
full.names = TRUE)
#Intuition check
head(reverse_reads)## [1] "data/01_DADA2/01_raw_gzipped_fastqs/568_4_S245_L001_R2_001.fastq.gz"
## [2] "data/01_DADA2/01_raw_gzipped_fastqs/568_5_S246_L001_R2_001.fastq.gz"
## [3] "data/01_DADA2/01_raw_gzipped_fastqs/581_5_S247_L001_R2_001.fastq.gz"
## [4] "data/01_DADA2/01_raw_gzipped_fastqs/611_5_S248_L001_R2_001.fastq.gz"
## [5] "data/01_DADA2/01_raw_gzipped_fastqs/E03_5_S146_L001_R2_001.fastq.gz"
## [6] "data/01_DADA2/01_raw_gzipped_fastqs/E05_5_S147_L001_R2_001.fastq.gz"
## [1] FALSE
Let’s see the quality of the raw reads before we trim
# Randomly select 12 samples from dataset to evaluate
# Selecting 12 is typically better than 2 (like we did in class for efficiency)
random_samples <- sample(1:length(reverse_reads), size = 12)
random_samples## [1] 6 1 14 12 11 4 9 10 13 7 2 8
# Calculate and plot quality of these two samples
forward_filteredQual_plot_12 <- plotQualityProfile(forward_reads[random_samples]) +
labs(title = "Forward Read Raw Quality")
reverse_filteredQual_plot_12 <- plotQualityProfile(reverse_reads[random_samples]) +
labs(title = "Reverse Read Raw Quality")
# Plot them together with patchwork
forward_filteredQual_plot_12 + reverse_filteredQual_plot_12# vector of our samples, extract the sample information from our file
samples <- sapply(strsplit(basename(forward_reads), "_"), function(x)paste(x[1],x[2],sep="_"))
#Intuition check
head(samples)## [1] "568_4" "568_5" "581_5" "611_5" "E03_5" "E05_5"
#place filtered reads into filtered_fastqs_path
filtered_fastqs_path <- "data/01_DADA2/02_filtered_fastqs"
filtered_fastqs_path## [1] "data/01_DADA2/02_filtered_fastqs"
# create 2 variables : filtered_F, filtered_R
filtered_forward_reads <- file.path(filtered_fastqs_path, paste0(samples, "_R1_filtered.fastq.gz"))
#Intuition check
head(filtered_forward_reads)## [1] "data/01_DADA2/02_filtered_fastqs/568_4_R1_filtered.fastq.gz"
## [2] "data/01_DADA2/02_filtered_fastqs/568_5_R1_filtered.fastq.gz"
## [3] "data/01_DADA2/02_filtered_fastqs/581_5_R1_filtered.fastq.gz"
## [4] "data/01_DADA2/02_filtered_fastqs/611_5_R1_filtered.fastq.gz"
## [5] "data/01_DADA2/02_filtered_fastqs/E03_5_R1_filtered.fastq.gz"
## [6] "data/01_DADA2/02_filtered_fastqs/E05_5_R1_filtered.fastq.gz"
## [1] 14
filtered_reverse_reads <- file.path(filtered_fastqs_path, paste0(samples, "_R2_filtered.fastq.gz"))
#Intuition check
head(filtered_reverse_reads)## [1] "data/01_DADA2/02_filtered_fastqs/568_4_R2_filtered.fastq.gz"
## [2] "data/01_DADA2/02_filtered_fastqs/568_5_R2_filtered.fastq.gz"
## [3] "data/01_DADA2/02_filtered_fastqs/581_5_R2_filtered.fastq.gz"
## [4] "data/01_DADA2/02_filtered_fastqs/611_5_R2_filtered.fastq.gz"
## [5] "data/01_DADA2/02_filtered_fastqs/E03_5_R2_filtered.fastq.gz"
## [6] "data/01_DADA2/02_filtered_fastqs/E05_5_R2_filtered.fastq.gz"
## [1] 14
Parameters of filter and trim DEPEND ON THE DATASET
maxN = number of N bases. Remove all Ns from the
data.maxEE = quality filtering threshold applied to expected
errors. By default, all expected errors. Mar recommends using c(1,1).
Here, if there is maxEE expected errors, its okay. If more, throw away
sequence.trimLeft = trim certain number of base pairs on start
of each readtruncQ = truncate reads at the first instance of a
quality score less than or equal to selected number. Chose 2rm.phix = remove phi xcompress = make filtered files .gzippedmultithread = multithread#Assign a vector to filtered reads
#Trim out poor bases, none in this instance.
#Write out filtered fastq files
filtered_reads <-
filterAndTrim(fwd = forward_reads, filt = filtered_forward_reads,
rev = reverse_reads, filt.rev = filtered_reverse_reads,
maxN = 0, maxEE = c(1, 1),truncQ = 2, rm.phix = TRUE,
compress = TRUE, multithread = TRUE)
#These files are described in Kozich et al 2013 AEM
# Describes library prep
#Forward and reverse read have full overlap
# 515F and 806R# Plot the 12 random samples after QC
forward_filteredQual_plot_12 <-
plotQualityProfile(filtered_forward_reads[random_samples]) +
labs(title = "Trimmed Forward Read Quality")
reverse_filteredQual_plot_12 <-
plotQualityProfile(filtered_reverse_reads[random_samples]) +
labs(title = "Trimmed Reverse Read Quality")
# Put the two plots together
forward_filteredQual_plot_12 + reverse_filteredQual_plot_12filterAndTrim## reads.in reads.out
## 568_4_S245_L001_R1_001.fastq.gz 52502 19150
## 568_5_S246_L001_R1_001.fastq.gz 38610 12725
## 581_5_S247_L001_R1_001.fastq.gz 35144 11521
## 611_5_S248_L001_R1_001.fastq.gz 30511 9968
## E03_5_S146_L001_R1_001.fastq.gz 16548 5176
## E05_5_S147_L001_R1_001.fastq.gz 107092 29954
# calculate some stats
filtered_df %>%
reframe(median_reads_in = median(reads.in),
median_reads_out = median(reads.out),
median_percent_retained = (median(reads.out)/median(reads.in)))## median_reads_in median_reads_out median_percent_retained
## 1 45556 13554 0.2975239
Note every sequencing run needs to be run
separately! The error model MUST be run separately on
each illumina dataset. If you’d like to combine the datasets from
multiple sequencing runs, you’ll need to do the exact same
filterAndTrim() step AND, very importantly, you’ll
need to have the same primer and ASV length expected by the output.
Infer error rates for all possible transitions within purines and pyrimidines (A<>G or C<>T) and transversions between all purine and pyrimidine combinations.
Error model is learned by alternating estimation of the error rates and inference of sample composition until they converge.
## 50098734 total bases in 201111 reads from 14 samples will be used for learning the error rates.
#Plot forward reads errors
forward_error_plot <-
plotErrors(error_forward_reads, nominalQ = TRUE) +
labs(title = "Forward Read Error Model")
#Reverse reads
error_reverse_reads <-
learnErrors(filtered_reverse_reads, multithread = TRUE)## 50009687 total bases in 201111 reads from 14 samples will be used for learning the error rates.
#Plot reverse reads errors
reverse_error_plot <-
plotErrors(error_reverse_reads, nominalQ = TRUE) +
labs(title = "Reverse Read Error Model")
# Put the two plots together
forward_error_plot + reverse_error_plot## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
Both the forward and reverse reads seem to fit the error models pretty well. A2T, C2T, G2T, and T2G seem to have slightly higher errors than expected.
Details of the plot: - Points: The observed error
rates for each consensus quality score.
- Black line: Estimated error rates after convergence
of the machine-learning algorithm.
- Red line: The error rates expected under the nominal
definition of the Q-score.
Similar to what is mentioned in the dada2 tutorial: the estimated error rates (black line) are a “reasonably good” fit to the observed rates (points), and the error rates drop with increased quality as expected. We can now infer ASVs!
An important note: This process occurs separately on forward and reverse reads! This is quite a different approach from how OTUs are identified in Mothur and also from UCHIME, oligotyping, and other OTU, MED, and ASV approaches.
#Infer forward ASVs
dada_forward <- dada(filtered_forward_reads,
err = error_forward_reads, multithread = TRUE)## Sample 1 - 19150 reads in 1583 unique sequences.
## Sample 2 - 12725 reads in 2727 unique sequences.
## Sample 3 - 11521 reads in 2598 unique sequences.
## Sample 4 - 9968 reads in 2155 unique sequences.
## Sample 5 - 5176 reads in 1666 unique sequences.
## Sample 6 - 29954 reads in 7383 unique sequences.
## Sample 7 - 27298 reads in 2742 unique sequences.
## Sample 8 - 18065 reads in 4331 unique sequences.
## Sample 9 - 24760 reads in 4490 unique sequences.
## Sample 10 - 22987 reads in 5517 unique sequences.
## Sample 11 - 4929 reads in 1212 unique sequences.
## Sample 12 - 14383 reads in 4549 unique sequences.
## Sample 13 - 23 reads in 18 unique sequences.
## Sample 14 - 172 reads in 124 unique sequences.
## [1] "list"
#Infer reverse ASVs
dada_reverse <- dada(filtered_reverse_reads,
err = error_reverse_reads, multithread = TRUE)## Sample 1 - 19150 reads in 3087 unique sequences.
## Sample 2 - 12725 reads in 5071 unique sequences.
## Sample 3 - 11521 reads in 4789 unique sequences.
## Sample 4 - 9968 reads in 3987 unique sequences.
## Sample 5 - 5176 reads in 2253 unique sequences.
## Sample 6 - 29954 reads in 10212 unique sequences.
## Sample 7 - 27298 reads in 4387 unique sequences.
## Sample 8 - 18065 reads in 6069 unique sequences.
## Sample 9 - 24760 reads in 6449 unique sequences.
## Sample 10 - 22987 reads in 8072 unique sequences.
## Sample 11 - 4929 reads in 1598 unique sequences.
## Sample 12 - 14383 reads in 6091 unique sequences.
## Sample 13 - 23 reads in 18 unique sequences.
## Sample 14 - 172 reads in 141 unique sequences.
## $`568_4_R2_filtered.fastq.gz`
## dada-class: object describing DADA2 denoising results
## 49 sequence variants were inferred from 3087 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16
## $Neg1_S148_R2_filtered.fastq.gz
## dada-class: object describing DADA2 denoising results
## 3 sequence variants were inferred from 18 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16
Now, merge the forward and reverse ASVs into contigs.
# merge forward and reverse ASVs
merged_ASVs <- mergePairs(dada_forward, filtered_forward_reads,
dada_reverse, filtered_reverse_reads,
verbose = TRUE)## 17830 paired-reads (in 43 unique pairings) successfully merged out of 18951 (in 58 pairings) input.
## 11374 paired-reads (in 307 unique pairings) successfully merged out of 12044 (in 405 pairings) input.
## 10071 paired-reads (in 311 unique pairings) successfully merged out of 10817 (in 416 pairings) input.
## 8934 paired-reads (in 239 unique pairings) successfully merged out of 9375 (in 315 pairings) input.
## 4564 paired-reads (in 197 unique pairings) successfully merged out of 4794 (in 251 pairings) input.
## 28256 paired-reads (in 468 unique pairings) successfully merged out of 29015 (in 586 pairings) input.
## 26555 paired-reads (in 42 unique pairings) successfully merged out of 27146 (in 65 pairings) input.
## 17231 paired-reads (in 311 unique pairings) successfully merged out of 17564 (in 375 pairings) input.
## 23644 paired-reads (in 278 unique pairings) successfully merged out of 24148 (in 356 pairings) input.
## 22010 paired-reads (in 419 unique pairings) successfully merged out of 22449 (in 514 pairings) input.
## 4561 paired-reads (in 90 unique pairings) successfully merged out of 4645 (in 110 pairings) input.
## 13103 paired-reads (in 343 unique pairings) successfully merged out of 13654 (in 449 pairings) input.
## 9 paired-reads (in 3 unique pairings) successfully merged out of 9 (in 3 pairings) input.
## 85 paired-reads (in 17 unique pairings) successfully merged out of 85 (in 17 pairings) input.
## [1] "list"
## [1] 14
## [1] "568_4_R1_filtered.fastq.gz" "568_5_R1_filtered.fastq.gz"
## [3] "581_5_R1_filtered.fastq.gz" "611_5_R1_filtered.fastq.gz"
## [5] "E03_5_R1_filtered.fastq.gz" "E05_5_R1_filtered.fastq.gz"
## [7] "E1_4_R1_filtered.fastq.gz" "E1_5_R1_filtered.fastq.gz"
## [9] "E2_4_R1_filtered.fastq.gz" "E2_5_R1_filtered.fastq.gz"
## [11] "E3_4_R1_filtered.fastq.gz" "E3_5_R1_filtered.fastq.gz"
## [13] "Neg1_S148_R1_filtered.fastq.gz" "Neg2_S249_R1_filtered.fastq.gz"
# Create the ASV Count Table
raw_ASV_table <- makeSequenceTable(merged_ASVs)
# Write out the file to data/01_DADA2
# Check the type and dimensions of the data
dim(raw_ASV_table)## [1] 14 1391
## [1] "matrix" "array"
## [1] "integer"
# Inspect the distribution of sequence lengths of all ASVs in dataset
table(nchar(getSequences(raw_ASV_table)))##
## 252 253 254 255
## 207 1132 51 1
# Inspect the distribution of sequence lengths of all ASVs in dataset
# AFTER TRIM
data.frame(Seq_Length = nchar(getSequences(raw_ASV_table))) %>%
ggplot(aes(x = Seq_Length )) +
geom_histogram() +
labs(title = "Raw distribution of ASV length")## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
###################################################
###################################################
# TRIM THE ASVS
# Let's trim the ASVs to only be the right size, which is 253.
# 253 originates from our expected amplicon
# We will allow for a few
raw_ASV_table_trimmed <- raw_ASV_table[,nchar(colnames(raw_ASV_table)) %in% 252:254]
# Inspect the distribution of sequence lengths of all ASVs in dataset
table(nchar(getSequences(raw_ASV_table_trimmed)))##
## 252 253 254
## 207 1132 51
## [1] 0.9999469
# Inspect the distribution of sequence lengths of all ASVs in dataset
# AFTER TRIM
data.frame(Seq_Length = nchar(getSequences(raw_ASV_table_trimmed))) %>%
ggplot(aes(x = Seq_Length )) +
geom_histogram() +
labs(title = "Trimmed distribution of ASV length")## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Let's zoom in on the plot
data.frame(Seq_Length = nchar(getSequences(raw_ASV_table_trimmed))) %>%
ggplot(aes(x = Seq_Length )) +
geom_histogram() +
labs(title = "Trimmed distribution of ASV length") +
scale_y_continuous(limits = c(0, 500))## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing missing values (`geom_bar()`).
Taking into account the lower, zoomed-in plot. Do we want to remove those extra ASVs?
No, we will leave the extra ASVs at 252 and 254 basepairs.
Sometimes chimeras arise in our workflow.
Chimeric sequences are artificial sequences formed by the combination of two or more distinct biological sequences. These chimeric sequences can arise during the polymerase chain reaction (PCR) amplification step of the 16S rRNA gene, where fragments from different templates can be erroneously joined together.
Chimera removal is an essential step in the analysis of 16S sequencing data to improve the accuracy of downstream analyses, such as taxonomic assignment and diversity assessment. It helps to avoid the inclusion of misleading or spurious sequences that could lead to incorrect biological interpretations.
# Remove the chimeras in the raw ASV table
noChimeras_ASV_table <- removeBimeraDenovo(raw_ASV_table_trimmed,
method="consensus",
multithread=TRUE, verbose=TRUE)## Identified 16 bimeras out of 1390 input sequences.
## [1] 14 1374
## [1] 0.9958133
## [1] 0.9957604
# Plot it
data.frame(Seq_Length_NoChim = nchar(getSequences(noChimeras_ASV_table))) %>%
ggplot(aes(x = Seq_Length_NoChim )) +
geom_histogram()+
labs(title = "Trimmed + Chimera Removal distribution of ASV length")## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Here, we will look at the number of reads that were lost in the filtering, denoising, merging, and chimera removal.
# A little function to identify number seqs
getN <- function(x) sum(getUniques(x))
# Make the table to track the seqs
track <- cbind(filtered_reads,
sapply(dada_forward, getN),
sapply(dada_reverse, getN),
sapply(merged_ASVs, getN),
rowSums(noChimeras_ASV_table))
head(track)## reads.in reads.out
## 568_4_S245_L001_R1_001.fastq.gz 52502 19150 18964 18963 17830 17632
## 568_5_S246_L001_R1_001.fastq.gz 38610 12725 12313 12211 11374 11364
## 581_5_S247_L001_R1_001.fastq.gz 35144 11521 11126 10996 10071 10071
## 611_5_S248_L001_R1_001.fastq.gz 30511 9968 9594 9540 8934 8934
## E03_5_S146_L001_R1_001.fastq.gz 16548 5176 4909 4899 4564 4564
## E05_5_S147_L001_R1_001.fastq.gz 107092 29954 29345 29316 28256 28097
# Update column names to be more informative (most are missing at the moment!)
colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nochim")
rownames(track) <- samples
# Generate a dataframe to track the reads through our DADA2 pipeline
track_counts_df <-
track %>%
# make it a dataframe
as.data.frame() %>%
rownames_to_column(var = "names") %>%
mutate(perc_reads_retained = 100 * nochim / input)
# Visualize it in table format
DT::datatable(track_counts_df)# Plot it!
track_counts_df %>%
pivot_longer(input:nochim, names_to = "read_type", values_to = "num_reads") %>%
mutate(read_type = fct_relevel(read_type,
"input", "filtered", "denoisedF", "denoisedR", "merged", "nochim")) %>%
ggplot(aes(x = read_type, y = num_reads, fill = read_type)) +
geom_line(aes(group = names), color = "grey") +
geom_point(shape = 21, size = 3, alpha = 0.8) +
scale_fill_brewer(palette = "Spectral") +
labs(x = "Filtering Step", y = "Number of Sequences") +
theme_bw()Here, we will use the silva database version 138!
taxa_train <-
assignTaxonomy(noChimeras_ASV_table,
"/workdir/in_class_data/taxonomy/silva_nr99_v138.1_train_set.fa.gz",
multithread=TRUE)
taxa_addSpecies <-
addSpecies(taxa_train,
"/workdir/in_class_data/taxonomy/silva_species_assignment_v138.1.fa.gz")
# Inspect the taxonomy
taxa_print <- taxa_addSpecies # Removing sequence rownames for display only
rownames(taxa_print) <- NULL
#View(taxa_print)Below, we will prepare the following:
ASV_fastas: A fasta file that we can use to build a
tree for phylogenetic analyses (e.g. phylogenetic alpha diversity
metrics or UNIFRAC dissimilarty).########### 2. COUNT TABLE ###############
############## Modify the ASV names and then save a fasta file! ##############
# Give headers more manageable names
# First pull the ASV sequences
asv_seqs <- colnames(noChimeras_ASV_table)
asv_seqs[1:5]## [1] "TACGGAGGGTGCAAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGTCTATTAAGTCAGTGGTGAAAGTTTGCAGCTTAACTGTAAAAGTGCCATTGATACTGGTAGACTTGAGTGTGGTGAAGGTAGGCGGAATTCGTGGTGTAGCGGTGAAATGCATAGATACCACGAAGAACACCGATAGCGAAGGCAGCTTACTGTACCATTACTGACGCTGAGGCACGAAAGCGTGGGGAGCGAACAGG"
## [2] "TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGTCTGATAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTGTCAGACTTGAATGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACATTGACGCTGAGGTACGAAAGCGTGGGGAGCAAACAGG"
## [3] "TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGCCTTTTAAGTTGGATGTGAAAGCCCCGGGCTTAACCTGGGAACGGCATCCAAAACTGAGAGGCTTGAATGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACATTGACGCTGAGGTACGAAAGCGTGGGGAGCAAACAGG"
## [4] "TACGTAGGGGGCTAGCGTTATCCGGAATTACTGGGCGTAAAGGGTGCGTAGGTGGTTTCTTAAGTCAGAGGTGAAAGGCTACGGCTCAACCGTAGTAAGCCTTTGAAACTGAGAAACTTGAGTGCAGGAGAGGAGAGTAGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAATACCAGTTGCGAAGGCGGCTCTCTGGACTGTAACTGACACTGAGGCACGAAAGCGTGGGGAGCAAACAGG"
## [5] "TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGTCTGATAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTGTCAGACTTGAGTGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACACTGACGCTGAGGTACGAAAGCGTGGGGAGCAAACAGG"
# make headers for our ASV seq fasta file, which will be our asv names
asv_headers <- vector(dim(noChimeras_ASV_table)[2], mode = "character")
asv_headers[1:5]## [1] "" "" "" "" ""
# loop through vector and fill it in with ASV names
for (i in 1:dim(noChimeras_ASV_table)[2]) {
asv_headers[i] <- paste(">ASV", i, sep = "_")
}
# intitution check
asv_headers[1:5]## [1] ">ASV_1" ">ASV_2" ">ASV_3" ">ASV_4" ">ASV_5"
# Inspect the taxonomy table
#View(taxa_addSpecies)
##### Prepare tax table
# Add the ASV sequences from the rownames to a column
new_tax_tab <-
taxa_addSpecies%>%
as.data.frame() %>%
rownames_to_column(var = "ASVseqs")
head(new_tax_tab)## ASVseqs
## 1 TACGGAGGGTGCAAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGTCTATTAAGTCAGTGGTGAAAGTTTGCAGCTTAACTGTAAAAGTGCCATTGATACTGGTAGACTTGAGTGTGGTGAAGGTAGGCGGAATTCGTGGTGTAGCGGTGAAATGCATAGATACCACGAAGAACACCGATAGCGAAGGCAGCTTACTGTACCATTACTGACGCTGAGGCACGAAAGCGTGGGGAGCGAACAGG
## 2 TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGTCTGATAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTGTCAGACTTGAATGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACATTGACGCTGAGGTACGAAAGCGTGGGGAGCAAACAGG
## 3 TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGCCTTTTAAGTTGGATGTGAAAGCCCCGGGCTTAACCTGGGAACGGCATCCAAAACTGAGAGGCTTGAATGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACATTGACGCTGAGGTACGAAAGCGTGGGGAGCAAACAGG
## 4 TACGTAGGGGGCTAGCGTTATCCGGAATTACTGGGCGTAAAGGGTGCGTAGGTGGTTTCTTAAGTCAGAGGTGAAAGGCTACGGCTCAACCGTAGTAAGCCTTTGAAACTGAGAAACTTGAGTGCAGGAGAGGAGAGTAGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAATACCAGTTGCGAAGGCGGCTCTCTGGACTGTAACTGACACTGAGGCACGAAAGCGTGGGGAGCAAACAGG
## 5 TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGTCTGATAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTGTCAGACTTGAGTGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACACTGACGCTGAGGTACGAAAGCGTGGGGAGCAAACAGG
## 6 TACGGAGGGTGCGAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGCCTTTTAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACGGCATCCAAAACTGGGAGGCTCGAGTGCGGAAGAGGAGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACACTCTGGTCTGACACTGACGCTGAGGTACGAAAGCGTGGGGAGCAAACAGG
## Kingdom Phylum Class
## 1 Bacteria Bacteroidota Bacteroidia
## 2 Bacteria Proteobacteria Gammaproteobacteria
## 3 Bacteria Proteobacteria Gammaproteobacteria
## 4 Bacteria Firmicutes Clostridia
## 5 Bacteria Proteobacteria Gammaproteobacteria
## 6 Bacteria Proteobacteria Gammaproteobacteria
## Order Family Genus
## 1 Cytophagales Cyclobacteriaceae Aureibacter
## 2 Pseudomonadales Endozoicomonadaceae Endozoicomonas
## 3 Pseudomonadales Endozoicomonadaceae Endozoicomonas
## 4 Peptostreptococcales-Tissierellales Peptostreptococcaceae Romboutsia
## 5 Pseudomonadales Endozoicomonadaceae Endozoicomonas
## 6 Pseudomonadales Endozoicomonadaceae Endozoicomonas
## Species
## 1 <NA>
## 2 <NA>
## 3 <NA>
## 4 sedimentorum
## 5 <NA>
## 6 <NA>
# intution check
stopifnot(new_tax_tab$ASVseqs == colnames(noChimeras_ASV_table))
# Now let's add the ASV names
rownames(new_tax_tab) <- rownames(asv_tab)
head(new_tax_tab)## ASVseqs
## ASV_1 TACGGAGGGTGCAAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGTCTATTAAGTCAGTGGTGAAAGTTTGCAGCTTAACTGTAAAAGTGCCATTGATACTGGTAGACTTGAGTGTGGTGAAGGTAGGCGGAATTCGTGGTGTAGCGGTGAAATGCATAGATACCACGAAGAACACCGATAGCGAAGGCAGCTTACTGTACCATTACTGACGCTGAGGCACGAAAGCGTGGGGAGCGAACAGG
## ASV_2 TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGTCTGATAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTGTCAGACTTGAATGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACATTGACGCTGAGGTACGAAAGCGTGGGGAGCAAACAGG
## ASV_3 TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGCCTTTTAAGTTGGATGTGAAAGCCCCGGGCTTAACCTGGGAACGGCATCCAAAACTGAGAGGCTTGAATGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACATTGACGCTGAGGTACGAAAGCGTGGGGAGCAAACAGG
## ASV_4 TACGTAGGGGGCTAGCGTTATCCGGAATTACTGGGCGTAAAGGGTGCGTAGGTGGTTTCTTAAGTCAGAGGTGAAAGGCTACGGCTCAACCGTAGTAAGCCTTTGAAACTGAGAAACTTGAGTGCAGGAGAGGAGAGTAGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAATACCAGTTGCGAAGGCGGCTCTCTGGACTGTAACTGACACTGAGGCACGAAAGCGTGGGGAGCAAACAGG
## ASV_5 TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGTCTGATAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTGTCAGACTTGAGTGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACACTGACGCTGAGGTACGAAAGCGTGGGGAGCAAACAGG
## ASV_6 TACGGAGGGTGCGAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGCCTTTTAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACGGCATCCAAAACTGGGAGGCTCGAGTGCGGAAGAGGAGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACACTCTGGTCTGACACTGACGCTGAGGTACGAAAGCGTGGGGAGCAAACAGG
## Kingdom Phylum Class
## ASV_1 Bacteria Bacteroidota Bacteroidia
## ASV_2 Bacteria Proteobacteria Gammaproteobacteria
## ASV_3 Bacteria Proteobacteria Gammaproteobacteria
## ASV_4 Bacteria Firmicutes Clostridia
## ASV_5 Bacteria Proteobacteria Gammaproteobacteria
## ASV_6 Bacteria Proteobacteria Gammaproteobacteria
## Order Family Genus
## ASV_1 Cytophagales Cyclobacteriaceae Aureibacter
## ASV_2 Pseudomonadales Endozoicomonadaceae Endozoicomonas
## ASV_3 Pseudomonadales Endozoicomonadaceae Endozoicomonas
## ASV_4 Peptostreptococcales-Tissierellales Peptostreptococcaceae Romboutsia
## ASV_5 Pseudomonadales Endozoicomonadaceae Endozoicomonas
## ASV_6 Pseudomonadales Endozoicomonadaceae Endozoicomonas
## Species
## ASV_1 <NA>
## ASV_2 <NA>
## ASV_3 <NA>
## ASV_4 sedimentorum
## ASV_5 <NA>
## ASV_6 <NA>
### Final prep of tax table. Add new column with ASV names
asv_tax <-
new_tax_tab %>%
# add rownames from count table for phyloseq handoff
mutate(ASV = rownames(asv_tab)) %>%
# Resort the columns with select
dplyr::select(Kingdom, Phylum, Class, Order, Family, Genus, Species, ASV, ASVseqs)
head(asv_tax)## Kingdom Phylum Class
## ASV_1 Bacteria Bacteroidota Bacteroidia
## ASV_2 Bacteria Proteobacteria Gammaproteobacteria
## ASV_3 Bacteria Proteobacteria Gammaproteobacteria
## ASV_4 Bacteria Firmicutes Clostridia
## ASV_5 Bacteria Proteobacteria Gammaproteobacteria
## ASV_6 Bacteria Proteobacteria Gammaproteobacteria
## Order Family Genus
## ASV_1 Cytophagales Cyclobacteriaceae Aureibacter
## ASV_2 Pseudomonadales Endozoicomonadaceae Endozoicomonas
## ASV_3 Pseudomonadales Endozoicomonadaceae Endozoicomonas
## ASV_4 Peptostreptococcales-Tissierellales Peptostreptococcaceae Romboutsia
## ASV_5 Pseudomonadales Endozoicomonadaceae Endozoicomonas
## ASV_6 Pseudomonadales Endozoicomonadaceae Endozoicomonas
## Species ASV
## ASV_1 <NA> ASV_1
## ASV_2 <NA> ASV_2
## ASV_3 <NA> ASV_3
## ASV_4 sedimentorum ASV_4
## ASV_5 <NA> ASV_5
## ASV_6 <NA> ASV_6
## ASVseqs
## ASV_1 TACGGAGGGTGCAAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGTCTATTAAGTCAGTGGTGAAAGTTTGCAGCTTAACTGTAAAAGTGCCATTGATACTGGTAGACTTGAGTGTGGTGAAGGTAGGCGGAATTCGTGGTGTAGCGGTGAAATGCATAGATACCACGAAGAACACCGATAGCGAAGGCAGCTTACTGTACCATTACTGACGCTGAGGCACGAAAGCGTGGGGAGCGAACAGG
## ASV_2 TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGTCTGATAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTGTCAGACTTGAATGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACATTGACGCTGAGGTACGAAAGCGTGGGGAGCAAACAGG
## ASV_3 TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGCCTTTTAAGTTGGATGTGAAAGCCCCGGGCTTAACCTGGGAACGGCATCCAAAACTGAGAGGCTTGAATGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACATTGACGCTGAGGTACGAAAGCGTGGGGAGCAAACAGG
## ASV_4 TACGTAGGGGGCTAGCGTTATCCGGAATTACTGGGCGTAAAGGGTGCGTAGGTGGTTTCTTAAGTCAGAGGTGAAAGGCTACGGCTCAACCGTAGTAAGCCTTTGAAACTGAGAAACTTGAGTGCAGGAGAGGAGAGTAGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTAGGAGGAATACCAGTTGCGAAGGCGGCTCTCTGGACTGTAACTGACACTGAGGCACGAAAGCGTGGGGAGCAAACAGG
## ASV_5 TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGTCTGATAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTGTCAGACTTGAGTGCGGAAGAGGGGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACATCAGTGGCGAAGGCGACACCCTGGTCTGACACTGACGCTGAGGTACGAAAGCGTGGGGAGCAAACAGG
## ASV_6 TACGGAGGGTGCGAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGTAGGCGGCCTTTTAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACGGCATCCAAAACTGGGAGGCTCGAGTGCGGAAGAGGAGTGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACACTCTGGTCTGACACTGACGCTGAGGTACGAAAGCGTGGGGAGCAAACAGG
01_DADA2 filesNow, we will write the files! We will write the following to the
data/01_DADA2/ folder. We will save both as files that
could be submitted as supplements AND as .RData objects for easy loading
into the next steps into R.:
ASV_counts.tsv: ASV count table that has ASV names that
are re-written and shortened headers like ASV_1, ASV_2, etc, which will
match the names in our fasta file below. This will also be saved as
data/01_DADA2/ASV_counts.RData.ASV_counts_withSeqNames.tsv: This is generated with the
data object in this file known as noChimeras_ASV_table. ASV
headers include the entire ASV sequence ~250bps. In addition,
we will save this as a .RData object as
data/01_DADA2/noChimeras_ASV_table.RData as we will use
this data in analysis/02_Taxonomic_Assignment.Rmd to assign
the taxonomy from the sequence headers.ASVs.fasta: A fasta file output of the ASV names from
ASV_counts.tsv and the sequences from the ASVs in
ASV_counts_withSeqNames.tsv. A fasta file that we can use
to build a tree for phylogenetic analyses (e.g. phylogenetic alpha
diversity metrics or UNIFRAC dissimilarty).ASVs.fasta in
data/02_TaxAss_FreshTrain/ to be used for the taxonomy
classification in the next step in the workflow.track_read_counts.RData: To track how many reads we
lost throughout our workflow that could be used and plotted later. We
will add this to the metadata in
analysis/02_Taxonomic_Assignment.Rmd.# FIRST, we will save our output as regular files, which will be useful later on.
# Save to regular .tsv file
# Write BOTH the modified and unmodified ASV tables to a file!
# Write count table with ASV numbered names (e.g. ASV_1, ASV_2, etc)
write.table(asv_tab, "data/01_DADA2/ASV_counts.tsv", sep = "\t", quote = FALSE, col.names = NA)
# Write count table with ASV sequence names
write.table(noChimeras_ASV_table, "data/01_DADA2/ASV_counts_withSeqNames.tsv", sep = "\t", quote = FALSE, col.names = NA)
# Write out the fasta file for reference later on for what seq matches what ASV
asv_fasta <- c(rbind(asv_headers, asv_seqs))
# Save to a file!
write(asv_fasta, "data/01_DADA2/ASVs.fasta")
# SECOND, let's save the taxonomy tables
# Write the table
write.table(asv_tax, "data/01_DADA2/ASV_taxonomy.tsv", sep = "\t", quote = FALSE, col.names = NA)
# THIRD, let's save to a RData object
# Each of these files will be used in the analysis/02_Taxonomic_Assignment
# RData objects are for easy loading :)
save(noChimeras_ASV_table, file = "data/01_DADA2/noChimeras_ASV_table.RData")
save(asv_tab, file = "data/01_DADA2/ASV_counts.RData")
# And save the track_counts_df a R object, which we will merge with metadata information in the next step of the analysis in nalysis/02_Taxonomic_Assignment.
save(track_counts_df, file = "data/01_DADA2/track_read_counts.RData")##Session information
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.2.1 (2022-06-23)
## os Rocky Linux 9.0 (Blue Onyx)
## system x86_64, linux-gnu
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2024-03-08
## pandoc 3.1.1 @ /usr/lib/rstudio-server/bin/quarto/bin/tools/ (via rmarkdown)
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date (UTC) lib source
## ade4 1.7-22 2023-02-06 [1] CRAN (R 4.2.1)
## ape 5.7 2023-02-16 [2] CRAN (R 4.2.1)
## Biobase 2.56.0 2022-04-26 [2] Bioconductor
## BiocGenerics 0.42.0 2022-04-26 [2] Bioconductor
## BiocParallel 1.30.4 2022-10-11 [2] Bioconductor
## biomformat 1.26.0 2022-11-01 [1] Bioconductor
## Biostrings 2.64.1 2022-08-18 [2] Bioconductor
## bitops 1.0-7 2021-04-24 [2] CRAN (R 4.2.1)
## bslib 0.4.0 2022-07-16 [2] CRAN (R 4.2.1)
## cachem 1.0.6 2021-08-19 [2] CRAN (R 4.2.1)
## callr 3.7.5 2024-02-19 [1] CRAN (R 4.2.1)
## cli 3.6.2 2023-12-11 [1] CRAN (R 4.2.1)
## cluster 2.1.3 2022-03-28 [2] CRAN (R 4.2.1)
## codetools 0.2-18 2020-11-04 [2] CRAN (R 4.2.1)
## colorspace 2.0-3 2022-02-21 [2] CRAN (R 4.2.1)
## crayon 1.5.1 2022-03-26 [2] CRAN (R 4.2.1)
## crosstalk 1.2.0 2021-11-04 [2] CRAN (R 4.2.1)
## dada2 * 1.31.0 2024-02-21 [1] Github (benjjneb/dada2@e0029d0)
## data.table 1.14.8 2023-02-17 [2] CRAN (R 4.2.1)
## DelayedArray 0.22.0 2022-04-26 [2] Bioconductor
## deldir 2.0-2 2023-11-23 [2] CRAN (R 4.2.1)
## devtools * 2.4.5 2022-10-11 [1] CRAN (R 4.2.1)
## digest 0.6.34 2024-01-11 [1] CRAN (R 4.2.1)
## dplyr * 1.1.4 2023-11-17 [1] CRAN (R 4.2.1)
## DT * 0.32 2024-02-19 [1] CRAN (R 4.2.1)
## ellipsis 0.3.2 2021-04-29 [2] CRAN (R 4.2.1)
## evaluate 0.23 2023-11-01 [1] CRAN (R 4.2.1)
## fansi 1.0.3 2022-03-24 [2] CRAN (R 4.2.1)
## farver 2.1.1 2022-07-06 [2] CRAN (R 4.2.1)
## fastmap 1.1.0 2021-01-25 [2] CRAN (R 4.2.1)
## forcats * 1.0.0 2023-01-29 [1] CRAN (R 4.2.1)
## foreach 1.5.2 2022-02-02 [1] CRAN (R 4.2.1)
## fs 1.5.2 2021-12-08 [2] CRAN (R 4.2.1)
## generics 0.1.3 2022-07-05 [2] CRAN (R 4.2.1)
## GenomeInfoDb 1.32.4 2022-09-06 [2] Bioconductor
## GenomeInfoDbData 1.2.8 2022-10-15 [2] Bioconductor
## GenomicAlignments 1.32.1 2022-07-24 [2] Bioconductor
## GenomicRanges 1.48.0 2022-04-26 [2] Bioconductor
## ggplot2 * 3.4.4 2023-10-12 [2] CRAN (R 4.2.1)
## glue 1.6.2 2022-02-24 [2] CRAN (R 4.2.1)
## gtable 0.3.1 2022-09-01 [2] CRAN (R 4.2.1)
## highr 0.9 2021-04-16 [2] CRAN (R 4.2.1)
## hms 1.1.3 2023-03-21 [2] CRAN (R 4.2.1)
## htmltools 0.5.3 2022-07-18 [2] CRAN (R 4.2.1)
## htmlwidgets 1.5.4 2021-09-08 [2] CRAN (R 4.2.1)
## httpuv 1.6.5 2022-01-05 [2] CRAN (R 4.2.1)
## hwriter 1.3.2.1 2022-04-08 [1] CRAN (R 4.2.1)
## igraph 1.4.1 2023-02-24 [2] CRAN (R 4.2.1)
## interp 1.1-6 2024-01-26 [1] CRAN (R 4.2.1)
## IRanges 2.30.1 2022-08-18 [2] Bioconductor
## iterators 1.0.14 2022-02-05 [1] CRAN (R 4.2.1)
## jpeg 0.1-10 2022-11-29 [1] CRAN (R 4.2.1)
## jquerylib 0.1.4 2021-04-26 [2] CRAN (R 4.2.1)
## jsonlite 1.8.8 2023-12-04 [1] CRAN (R 4.2.1)
## knitr 1.40 2022-08-24 [2] CRAN (R 4.2.1)
## labeling 0.4.2 2020-10-20 [2] CRAN (R 4.2.1)
## later 1.3.0 2021-08-18 [2] CRAN (R 4.2.1)
## lattice 0.20-45 2021-09-22 [2] CRAN (R 4.2.1)
## latticeExtra 0.6-30 2022-07-04 [1] CRAN (R 4.2.1)
## lifecycle 1.0.3 2022-10-07 [2] CRAN (R 4.2.1)
## lubridate * 1.9.3 2023-09-27 [1] CRAN (R 4.2.1)
## magrittr 2.0.3 2022-03-30 [2] CRAN (R 4.2.1)
## MASS 7.3-57 2022-04-22 [2] CRAN (R 4.2.1)
## Matrix 1.6-5 2024-01-11 [2] CRAN (R 4.2.1)
## MatrixGenerics 1.8.1 2022-06-26 [2] Bioconductor
## matrixStats 0.62.0 2022-04-19 [2] CRAN (R 4.2.1)
## memoise 2.0.1 2021-11-26 [2] CRAN (R 4.2.1)
## mgcv 1.8-40 2022-03-29 [2] CRAN (R 4.2.1)
## mime 0.12 2021-09-28 [2] CRAN (R 4.2.1)
## miniUI 0.1.1.1 2018-05-18 [2] CRAN (R 4.2.1)
## multtest 2.54.0 2022-11-01 [1] Bioconductor
## munsell 0.5.0 2018-06-12 [2] CRAN (R 4.2.1)
## nlme 3.1-157 2022-03-25 [2] CRAN (R 4.2.1)
## pacman 0.5.1 2019-03-11 [1] CRAN (R 4.2.1)
## patchwork * 1.2.0 2024-01-08 [1] CRAN (R 4.2.1)
## permute 0.9-7 2022-01-27 [1] CRAN (R 4.2.1)
## phyloseq * 1.41.1 2024-03-06 [1] Github (joey711/phyloseq@c260561)
## pillar 1.9.0 2023-03-22 [1] CRAN (R 4.2.1)
## pkgbuild 1.3.1 2021-12-20 [2] CRAN (R 4.2.1)
## pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 4.2.1)
## pkgload 1.3.4 2024-01-16 [1] CRAN (R 4.2.1)
## plyr 1.8.8 2022-11-11 [2] CRAN (R 4.2.1)
## png 0.1-7 2013-12-03 [2] CRAN (R 4.2.1)
## prettyunits 1.1.1 2020-01-24 [2] CRAN (R 4.2.1)
## processx 3.8.3 2023-12-10 [1] CRAN (R 4.2.1)
## profvis 0.3.7 2020-11-02 [2] CRAN (R 4.2.1)
## promises 1.2.0.1 2021-02-11 [2] CRAN (R 4.2.1)
## ps 1.7.6 2024-01-18 [1] CRAN (R 4.2.1)
## purrr * 1.0.1 2023-01-10 [2] CRAN (R 4.2.1)
## R6 2.5.1 2021-08-19 [2] CRAN (R 4.2.1)
## RColorBrewer 1.1-3 2022-04-03 [2] CRAN (R 4.2.1)
## Rcpp * 1.0.9 2022-07-08 [2] CRAN (R 4.2.1)
## RcppParallel 5.1.7 2023-02-27 [1] CRAN (R 4.2.1)
## RCurl 1.98-1.9 2022-10-03 [2] CRAN (R 4.2.1)
## readr * 2.1.4 2023-02-10 [2] CRAN (R 4.2.1)
## remotes 2.4.2 2021-11-30 [2] CRAN (R 4.2.1)
## reshape2 1.4.4 2020-04-09 [2] CRAN (R 4.2.1)
## rhdf5 2.40.0 2022-04-26 [2] Bioconductor
## rhdf5filters 1.8.0 2022-04-26 [2] Bioconductor
## Rhdf5lib 1.18.2 2022-05-15 [2] Bioconductor
## rlang 1.1.3 2024-01-10 [2] CRAN (R 4.2.1)
## rmarkdown 2.16 2022-08-24 [2] CRAN (R 4.2.1)
## Rsamtools 2.12.0 2022-04-26 [2] Bioconductor
## rstudioapi 0.14 2022-08-22 [2] CRAN (R 4.2.1)
## S4Vectors 0.34.0 2022-04-26 [2] Bioconductor
## sass 0.4.2 2022-07-16 [2] CRAN (R 4.2.1)
## scales 1.2.1 2022-08-20 [2] CRAN (R 4.2.1)
## sessioninfo 1.2.2 2021-12-06 [2] CRAN (R 4.2.1)
## shiny 1.7.2 2022-07-19 [2] CRAN (R 4.2.1)
## ShortRead 1.56.1 2022-11-18 [1] Bioconductor
## stringi 1.7.8 2022-07-11 [2] CRAN (R 4.2.1)
## stringr * 1.5.0 2022-12-02 [2] CRAN (R 4.2.1)
## SummarizedExperiment 1.26.1 2022-04-29 [2] Bioconductor
## survival 3.3-1 2022-03-03 [2] CRAN (R 4.2.1)
## tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.2.1)
## tidyr * 1.3.0 2023-01-24 [2] CRAN (R 4.2.1)
## tidyselect 1.2.0 2022-10-10 [2] CRAN (R 4.2.1)
## tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.2.1)
## timechange 0.3.0 2024-01-18 [1] CRAN (R 4.2.1)
## tzdb 0.4.0 2023-05-12 [2] CRAN (R 4.2.1)
## urlchecker 1.0.1 2021-11-30 [2] CRAN (R 4.2.1)
## usethis * 2.1.6 2022-05-25 [2] CRAN (R 4.2.1)
## utf8 1.2.2 2021-07-24 [2] CRAN (R 4.2.1)
## vctrs 0.6.5 2023-12-01 [1] CRAN (R 4.2.1)
## vegan 2.6-4 2022-10-11 [1] CRAN (R 4.2.1)
## withr 2.5.0 2022-03-03 [2] CRAN (R 4.2.1)
## xfun 0.32 2022-08-10 [2] CRAN (R 4.2.1)
## xtable 1.8-4 2019-04-21 [2] CRAN (R 4.2.1)
## XVector 0.36.0 2022-04-26 [2] Bioconductor
## yaml 2.3.5 2022-02-21 [2] CRAN (R 4.2.1)
## zlibbioc 1.42.0 2022-04-26 [2] Bioconductor
##
## [1] /home/cab565/R/x86_64-pc-linux-gnu-library/4.2
## [2] /programs/R-4.2.1-r9/library
##
## ──────────────────────────────────────────────────────────────────────────────